import scvelo as scv
scv.logging.print_version()
/Users/anaconda3/lib/python3.9/site-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.24.4
warnings.warn(f"A NumPy version >={np_minversion} and <{np_maxversion}"
Running scvelo 0.3.1 (python 3.9.12) on 2024-02-21 23:52.
ERROR: XMLRPC request failed [code: -32500] RuntimeError: PyPI no longer supports 'pip search' (or XML-RPC search). Please use https://pypi.org/search (via a browser) instead. See https://warehouse.pypa.io/api-reference/xml-rpc.html#deprecated-methods for more information.
import scvelo as scv
import scanpy as sc
import cellrank as cr
import numpy as np
import pandas as pd
import anndata as ad
scv.settings.verbosity = 3
scv.settings.set_figure_params('scvelo', facecolor='white', dpi=100, frameon=False)
cr.settings.verbosity = 2
scv.settings.verbosity = 3 # show errors(0), warnings(1), info(2), hints(3)
scv.settings.presenter_view = True # set max width size for presenter view
scv.set_figure_params('scvelo') # for beautified visualization
adata = sc.read('/Users/usri/Desktop/YAO.7feb/feb10/adata3_feb12.scVelo.h5ad')
adata
AnnData object with n_obs × n_vars = 1770 × 1999
obs: 'Barcode', 'old.barcode', 'orig.ident.x', 'nCount_spliced', 'nFeature_spliced', 'nCount_unspliced', 'nFeature_unspliced', 'nCount_ambiguous', 'nFeature_ambiguous', 'nCount_SCT', 'nFeature_SCT', 'SCT_snn_res.0.8', 'seurat_clusters.x', 'SCT_snn_res.0.5', 'clusters', 'percent.mt.x', 'SCT_snn_res.0.3', 'SCT_snn_res.0.2', 'orig.ident.y', 'nCount_RNA', 'nFeature_RNA', 'percent.mt.y', 'RNA_snn_res.0.6', 'seurat_clusters.y', 'Clusters', 'RNA_snn_res.0.2', 'RNA_snn_res.0.3', 'EMTScore.76GS', 'Clusters.old', 'Clusters.number', 'Clusters.term', 'UMAP_1', 'UMAP_2', 'initial_size_unspliced', 'initial_size_spliced', 'initial_size', 'n_counts', 'velocity_self_transition', 'velocity_length', 'velocity_confidence', 'velocity_confidence_transition', 'root_cells', 'end_points', 'velocity_pseudotime'
var: 'vst.mean', 'vst.variance', 'vst.variance.expected', 'vst.variance.standardized', 'vst.variable', 'Accession', 'Chromosome', 'End', 'Start', 'Strand', 'velocity_gamma', 'velocity_qreg_ratio', 'velocity_r2', 'velocity_genes', 'spearmans_score', 'velocity_score', 'fit_alpha', 'fit_beta', 'fit_gamma', 'fit_t_', 'fit_scaling', 'fit_std_u', 'fit_std_s', 'fit_likelihood', 'fit_u0', 'fit_s0', 'fit_pval_steady', 'fit_steady_u', 'fit_steady_s', 'fit_variance', 'fit_alignment_scaling', 'fit_diff_kinetics', 'fit_pval_kinetics'
uns: 'Clusters.number_sizes', 'clusters_colors', 'neighbors', 'paga', 'rank_velocity_genes', 'recover_dynamics', 'velocity_graph', 'velocity_graph_neg', 'velocity_params'
obsm: 'X_pca', 'X_umap', 'velocity_umap'
varm: 'PCs', 'fit_pvals_kinetics', 'loss'
layers: 'Ms', 'Mu', 'ambiguous', 'fit_t', 'fit_tau', 'fit_tau_', 'matrix', 'spliced', 'unspliced', 'variance_velocity', 'velocity'
obsp: 'connectivities', 'distances'
# show proportions of spliced/unspliced abundances
scv.utils.show_proportions(adata)
adata
Abundance of ['unspliced', 'spliced', 'ambiguous']: [0.17 0.81 0.02]
AnnData object with n_obs × n_vars = 1770 × 1999
obs: 'Barcode', 'old.barcode', 'orig.ident.x', 'nCount_spliced', 'nFeature_spliced', 'nCount_unspliced', 'nFeature_unspliced', 'nCount_ambiguous', 'nFeature_ambiguous', 'nCount_SCT', 'nFeature_SCT', 'SCT_snn_res.0.8', 'seurat_clusters.x', 'SCT_snn_res.0.5', 'clusters', 'percent.mt.x', 'SCT_snn_res.0.3', 'SCT_snn_res.0.2', 'orig.ident.y', 'nCount_RNA', 'nFeature_RNA', 'percent.mt.y', 'RNA_snn_res.0.6', 'seurat_clusters.y', 'Clusters', 'RNA_snn_res.0.2', 'RNA_snn_res.0.3', 'EMTScore.76GS', 'Clusters.old', 'Clusters.number', 'Clusters.term', 'UMAP_1', 'UMAP_2', 'initial_size_unspliced', 'initial_size_spliced', 'initial_size', 'n_counts', 'velocity_self_transition', 'velocity_length', 'velocity_confidence', 'velocity_confidence_transition', 'root_cells', 'end_points', 'velocity_pseudotime'
var: 'vst.mean', 'vst.variance', 'vst.variance.expected', 'vst.variance.standardized', 'vst.variable', 'Accession', 'Chromosome', 'End', 'Start', 'Strand', 'velocity_gamma', 'velocity_qreg_ratio', 'velocity_r2', 'velocity_genes', 'spearmans_score', 'velocity_score', 'fit_alpha', 'fit_beta', 'fit_gamma', 'fit_t_', 'fit_scaling', 'fit_std_u', 'fit_std_s', 'fit_likelihood', 'fit_u0', 'fit_s0', 'fit_pval_steady', 'fit_steady_u', 'fit_steady_s', 'fit_variance', 'fit_alignment_scaling', 'fit_diff_kinetics', 'fit_pval_kinetics'
uns: 'Clusters.number_sizes', 'clusters_colors', 'neighbors', 'paga', 'rank_velocity_genes', 'recover_dynamics', 'velocity_graph', 'velocity_graph_neg', 'velocity_params'
obsm: 'X_pca', 'X_umap', 'velocity_umap'
varm: 'PCs', 'fit_pvals_kinetics', 'loss'
layers: 'Ms', 'Mu', 'ambiguous', 'fit_t', 'fit_tau', 'fit_tau_', 'matrix', 'spliced', 'unspliced', 'variance_velocity', 'velocity'
obsp: 'connectivities', 'distances'
#Preprocess the data
scv.pp.filter_genes(adata, min_shared_counts=3)
scv.pp.normalize_per_cell(adata)
scv.pp.filter_genes_dispersion(adata, n_top_genes=3000)
scv.pp.log1p(adata)
Filtered out 623 genes that are detected 3 counts (shared). WARNING: Did not normalize X as it looks processed already. To enforce normalization, set `enforce=True`. WARNING: Did not normalize spliced as it looks processed already. To enforce normalization, set `enforce=True`. WARNING: Did not normalize unspliced as it looks processed already. To enforce normalization, set `enforce=True`. Skip filtering by dispersion since number of variables are less than `n_top_genes`.
/var/folders/fj/btmxy73n74zgn38kbjwfv7zh0000gn/T/ipykernel_36991/2067228601.py:5: DeprecationWarning: `log1p` is deprecated since scVelo v0.3.0 and will be removed in a future version. Please use `log1p` from `scanpy.pp` instead. scv.pp.log1p(adata)
adata
AnnData object with n_obs × n_vars = 1770 × 1376
obs: 'Barcode', 'old.barcode', 'orig.ident.x', 'nCount_spliced', 'nFeature_spliced', 'nCount_unspliced', 'nFeature_unspliced', 'nCount_ambiguous', 'nFeature_ambiguous', 'nCount_SCT', 'nFeature_SCT', 'SCT_snn_res.0.8', 'seurat_clusters.x', 'SCT_snn_res.0.5', 'clusters', 'percent.mt.x', 'SCT_snn_res.0.3', 'SCT_snn_res.0.2', 'orig.ident.y', 'nCount_RNA', 'nFeature_RNA', 'percent.mt.y', 'RNA_snn_res.0.6', 'seurat_clusters.y', 'Clusters', 'RNA_snn_res.0.2', 'RNA_snn_res.0.3', 'EMTScore.76GS', 'Clusters.old', 'Clusters.number', 'Clusters.term', 'UMAP_1', 'UMAP_2', 'initial_size_unspliced', 'initial_size_spliced', 'initial_size', 'n_counts', 'velocity_self_transition', 'velocity_length', 'velocity_confidence', 'velocity_confidence_transition', 'root_cells', 'end_points', 'velocity_pseudotime'
var: 'vst.mean', 'vst.variance', 'vst.variance.expected', 'vst.variance.standardized', 'vst.variable', 'Accession', 'Chromosome', 'End', 'Start', 'Strand', 'velocity_gamma', 'velocity_qreg_ratio', 'velocity_r2', 'velocity_genes', 'spearmans_score', 'velocity_score', 'fit_alpha', 'fit_beta', 'fit_gamma', 'fit_t_', 'fit_scaling', 'fit_std_u', 'fit_std_s', 'fit_likelihood', 'fit_u0', 'fit_s0', 'fit_pval_steady', 'fit_steady_u', 'fit_steady_s', 'fit_variance', 'fit_alignment_scaling', 'fit_diff_kinetics', 'fit_pval_kinetics'
uns: 'Clusters.number_sizes', 'clusters_colors', 'neighbors', 'paga', 'rank_velocity_genes', 'recover_dynamics', 'velocity_graph', 'velocity_graph_neg', 'velocity_params', 'log1p'
obsm: 'X_pca', 'X_umap', 'velocity_umap'
varm: 'PCs', 'fit_pvals_kinetics', 'loss'
layers: 'Ms', 'Mu', 'ambiguous', 'fit_t', 'fit_tau', 'fit_tau_', 'matrix', 'spliced', 'unspliced', 'variance_velocity', 'velocity'
obsp: 'connectivities', 'distances'
scv.pp.filter_and_normalize(adata, min_shared_counts=3, n_top_genes=2000)
scv.pp.moments(adata, n_pcs=30, n_neighbors=30)
WARNING: Did not normalize X as it looks processed already. To enforce normalization, set `enforce=True`.
WARNING: Did not normalize spliced as it looks processed already. To enforce normalization, set `enforce=True`.
WARNING: Did not normalize unspliced as it looks processed already. To enforce normalization, set `enforce=True`.
Skip filtering by dispersion since number of variables are less than `n_top_genes`.
WARNING: adata.X seems to be already log-transformed.
Logarithmized X.
computing moments based on connectivities
finished (0:00:00) --> added
'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)
/Users/anaconda3/lib/python3.9/site-packages/scvelo/preprocessing/utils.py:705: DeprecationWarning: `log1p` is deprecated since scVelo v0.3.0 and will be removed in a future version. Please use `log1p` from `scanpy.pp` instead. log1p(adata)
#Compute velocity and velocity graph
#The gene-specific velocities are obtained by fitting a ratio between precursor (unspliced) and mature (spliced) mRNA abundances that well explains the steady states (constant transcriptional state) and then computing how the observed abundances deviate from what is expected in steady state.
scv.tl.velocity(adata)
computing velocities
finished (0:00:00) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
scv.tl.velocity_graph(adata)
computing velocity graph (using 1/8 cores)
0%| | 0/1770 [00:00<?, ?cells/s]
finished (0:00:02) --> added
'velocity_graph', sparse matrix with cosine correlations (adata.uns)
adata
AnnData object with n_obs × n_vars = 1770 × 1376
obs: 'Barcode', 'old.barcode', 'orig.ident.x', 'nCount_spliced', 'nFeature_spliced', 'nCount_unspliced', 'nFeature_unspliced', 'nCount_ambiguous', 'nFeature_ambiguous', 'nCount_SCT', 'nFeature_SCT', 'SCT_snn_res.0.8', 'seurat_clusters.x', 'SCT_snn_res.0.5', 'clusters', 'percent.mt.x', 'SCT_snn_res.0.3', 'SCT_snn_res.0.2', 'orig.ident.y', 'nCount_RNA', 'nFeature_RNA', 'percent.mt.y', 'RNA_snn_res.0.6', 'seurat_clusters.y', 'Clusters', 'RNA_snn_res.0.2', 'RNA_snn_res.0.3', 'EMTScore.76GS', 'Clusters.old', 'Clusters.number', 'Clusters.term', 'UMAP_1', 'UMAP_2', 'initial_size_unspliced', 'initial_size_spliced', 'initial_size', 'n_counts', 'velocity_self_transition', 'velocity_length', 'velocity_confidence', 'velocity_confidence_transition', 'root_cells', 'end_points', 'velocity_pseudotime'
var: 'vst.mean', 'vst.variance', 'vst.variance.expected', 'vst.variance.standardized', 'vst.variable', 'Accession', 'Chromosome', 'End', 'Start', 'Strand', 'velocity_gamma', 'velocity_qreg_ratio', 'velocity_r2', 'velocity_genes', 'spearmans_score', 'velocity_score', 'fit_alpha', 'fit_beta', 'fit_gamma', 'fit_t_', 'fit_scaling', 'fit_std_u', 'fit_std_s', 'fit_likelihood', 'fit_u0', 'fit_s0', 'fit_pval_steady', 'fit_steady_u', 'fit_steady_s', 'fit_variance', 'fit_alignment_scaling', 'fit_diff_kinetics', 'fit_pval_kinetics'
uns: 'Clusters.number_sizes', 'clusters_colors', 'neighbors', 'paga', 'rank_velocity_genes', 'recover_dynamics', 'velocity_graph', 'velocity_graph_neg', 'velocity_params', 'log1p'
obsm: 'X_pca', 'X_umap', 'velocity_umap'
varm: 'PCs', 'fit_pvals_kinetics', 'loss'
layers: 'Ms', 'Mu', 'ambiguous', 'fit_t', 'fit_tau', 'fit_tau_', 'matrix', 'spliced', 'unspliced', 'variance_velocity', 'velocity'
obsp: 'connectivities', 'distances'
#Plot results
#Finally, the velocities are projected onto any embedding specified in basis and visualized in one of three available ways: on single cell level, on grid level, or as streamplot as shown here.
scv.pl.velocity_embedding_stream(adata, basis='umap', color=['Clusters.term', 'Clusters.number'])
computing velocity embedding
finished (0:00:00) --> added
'velocity_umap', embedded velocity vectors (adata.obsm)
#Clusters
scv.pl.velocity_embedding_stream(adata, basis='umap', color=['Clusters', 'root_cells'], arrow_size=1, dpi=100)
scv.pl.velocity_embedding(adata, basis='umap', arrow_length=3, arrow_size=2, dpi=200, color='Clusters.number')
#scv.pl.velocity_embedding_grid(adata, color='KRT17', layer=['velocity', 'spliced'], arrow_size=3)
scv.pl.velocity_embedding_grid(adata, color='KRT17', layer=['velocity'], arrow_size=2, arrow_length=3, dpi=150)
scv.pl.velocity_embedding_grid(adata, color='KRT17', layer=['spliced'], arrow_size=2, arrow_length=3, dpi=150)
scv.pl.velocity_embedding_grid(adata, color='KRT20', layer=['velocity'], arrow_size=2, arrow_length=3, dpi=150, )
scv.pl.velocity_embedding_grid(adata, color='KRT20', layer=['spliced'], arrow_size=2, arrow_length=3, dpi=150)
scv.tl.rank_velocity_genes(adata, groupby='Clusters')
#scv.DataFrame(adata.uns['rank_velocity_genes']['names']).head()
ranking velocity genes
finished (0:00:00) --> added
'rank_velocity_genes', sorted scores by group ids (adata.uns)
'spearmans_score', spearmans correlation scores (adata.var)
#adata.uns
var_names = ['KRT17']
scv.pl.velocity(adata, var_names=var_names, colorbar=True, ncols=2)
/Users/anaconda3/lib/python3.9/site-packages/scvelo/plotting/scatter.py:655: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored smp = ax.scatter(
var_names = ['KRT19']
scv.pl.velocity(adata, var_names=var_names, colorbar=True, ncols=2)
/Users/anaconda3/lib/python3.9/site-packages/scvelo/plotting/scatter.py:655: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored smp = ax.scatter(
var_names = ['KRT20']
scv.pl.velocity(adata, var_names=var_names, colorbar=True, ncols=2)
/Users/anaconda3/lib/python3.9/site-packages/scvelo/plotting/scatter.py:655: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored smp = ax.scatter(
scv.pl.velocity_graph(adata, color='Clusters.number' )
scv.pl.velocity_graph(adata, color='Clusters.term' )
#velocity_pseudotime
scv.pl.velocity_graph(adata, color='velocity_pseudotime' )
scv.tl.terminal_states(adata)
scv.pl.velocity_embedding_grid(adata, basis='umap', color=['root_cells', 'end_points'], arrow_size=3, arrow_length=3, dpi=200 )
computing terminal states
identified 1 region of root cells and 1 region of end points .
finished (0:00:00) --> added
'root_cells', root cells of Markov diffusion process (adata.obs)
'end_points', end points of Markov diffusion process (adata.obs)
scv.pl.velocity(adata, basis='umap', var_names=["KRT17", "KRT18", "KRT19"])
/Users/anaconda3/lib/python3.9/site-packages/scvelo/plotting/scatter.py:655: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored smp = ax.scatter( /Users/anaconda3/lib/python3.9/site-packages/scvelo/plotting/scatter.py:655: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored smp = ax.scatter( /Users/anaconda3/lib/python3.9/site-packages/scvelo/plotting/scatter.py:655: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored smp = ax.scatter(
adata.write('/Users/usri/Desktop/YAO.7feb/feb10/adata.RNAvelocity.scapy.20feb.h5ad')
scv.pl.scatter(adata, color=['root_cells', 'end_points'])
#velocity_pseudotime
scv.pl.scatter(adata, color=['velocity_pseudotime'])
scv.tl.latent_time(adata)
scv.pl.scatter(adata,color='latent_time', color_map='gnuplot', colorbar=True)
computing latent time using root_cells as prior
finished (0:00:00) --> added
'latent_time', shared time (adata.obs)
top_genes= adata.var['fit_likelihood'].sort_values(ascending=False).index
scv.pl.scatter(adata, basis=top_genes[:10], ncols=5, frameon=False)
scv.pl.scatter(adata, basis="KRT19", ncols=1, frameon=False)
scv.pl.scatter(adata, basis="KRT17", ncols=1, frameon=False)
scv.pl.scatter(adata, color=['root_cells', 'EMTScore.76GS'])
sc.pl.umap(adata, color="KRT17")
adata.var
| vst.mean | vst.variance | vst.variance.expected | vst.variance.standardized | vst.variable | Accession | Chromosome | End | Start | Strand | ... | fit_likelihood | fit_u0 | fit_s0 | fit_pval_steady | fit_steady_u | fit_steady_s | fit_variance | fit_alignment_scaling | fit_diff_kinetics | fit_pval_kinetics | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| HES4 | 2.011864 | 6.088610 | 4.699831 | 1.295495 | 1 | ENSG00000188290 | 1 | 1000172 | 998962 | - | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | nan | NaN |
| ISG15 | 3.800565 | 29.049519 | 12.996684 | 2.235148 | 1 | ENSG00000187608 | 1 | 1014540 | 1001138 | + | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | nan | NaN |
| AL592464.3 | 0.010169 | 0.129913 | 0.011446 | 1.253532 | 1 | ENSG00000287396 | 1 | 2817767 | 2814056 | + | ... | 0.001567 | 0.0 | 0.0 | 0.255973 | 0.058847 | 0.414124 | 2.964007 | 1.012848 | nan | NaN |
| ERRFI1 | 0.723729 | 1.622331 | 1.115570 | 1.454262 | 1 | ENSG00000116285 | 1 | 8026309 | 8004404 | - | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | nan | NaN |
| GPR157 | 0.502825 | 0.816555 | 0.700716 | 1.165315 | 1 | ENSG00000180758 | 1 | 9129102 | 9100305 | - | ... | 0.177171 | 0.0 | 0.0 | 0.479218 | 0.217318 | 1.264409 | 1.629003 | 2.039069 | nan | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| L1CAM | 0.028814 | 0.042697 | 0.033229 | 1.284924 | 1 | ENSG00000198910 | X | 153886173 | 153861514 | - | ... | 0.111293 | 0.0 | 0.0 | 0.278802 | 0.046617 | 0.183412 | 1.368152 | 1.853301 | nan | NaN |
| NAA10 | 4.915819 | 23.293644 | 20.054323 | 1.161527 | 1 | ENSG00000102030 | X | 153935080 | 153929225 | - | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | nan | NaN |
| FLNA | 1.048588 | 2.958633 | 1.827654 | 1.618814 | 1 | ENSG00000196924 | X | 154374638 | 154348524 | - | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | nan | NaN |
| LAGE3 | 2.146328 | 5.909045 | 5.190976 | 1.138330 | 1 | ENSG00000196976 | X | 154479257 | 154477769 | - | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | nan | NaN |
| CTAG2 | 0.145198 | 0.535717 | 0.175973 | 3.044310 | 1 | ENSG00000126890 | X | 154653579 | 154651972 | - | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | nan | NaN |
1376 rows × 33 columns
# find gene markers for each of the detected leiden clusters
# Perform the ranking
sc.tl.rank_genes_groups(adata, 'Clusters.term', method='wilcoxon')
# The result is stored in adata.uns['rank_genes_groups']
result = adata.uns['rank_genes_groups']
# Initialize a list to hold dataframes for each cluster
df_list = []
# Iterate over clusters
for group in result['names'].dtype.names:
# Create a DataFrame for the current cluster
df = pd.DataFrame({
'Genes': result['names'][group],
'Scores': result['scores'][group],
'Logfoldchanges': result['logfoldchanges'][group],
'pvals': result['pvals'][group],
'pvals_adj': result['pvals_adj'][group],
})
# Add a column for the cluster
df['Cluster'] = group
# Append the DataFrame to the list
df_list.append(df)
# Concatenate all dataframes
df_all = pd.concat(df_list, ignore_index=True)
# Write DataFrame to a csv file
df_all.to_csv('/Users/usri/Desktop/YAO.7feb/feb10/scanpy_marker_genes.csv', index=False)
df_all
| Genes | Scores | Logfoldchanges | pvals | pvals_adj | Cluster | |
|---|---|---|---|---|---|---|
| 0 | APCDD1 | 12.732535 | 7.994702 | 3.899681e-37 | 1.427322e-32 | Basal-like |
| 1 | PCP4 | 12.551043 | 7.022396 | 3.923096e-36 | 7.179462e-32 | Basal-like |
| 2 | OXR1 | 12.412327 | 3.096099 | 2.240448e-35 | 2.733422e-31 | Basal-like |
| 3 | SLC7A8 | 12.032034 | 3.951415 | 2.411465e-33 | 2.206551e-29 | Basal-like |
| 4 | LEF1 | 11.876751 | 6.676805 | 1.563254e-32 | 1.144333e-28 | Basal-like |
| ... | ... | ... | ... | ... | ... | ... |
| 183000 | LDHA | -5.866008 | 0.006572 | 4.464114e-09 | 3.598922e-08 | Stem.TA.2 |
| 183001 | MALAT1 | -7.237833 | -0.441098 | 4.559107e-13 | 5.164589e-12 | Stem.TA.2 |
| 183002 | RPL13A | -7.252900 | -0.107191 | 4.079416e-13 | 4.645634e-12 | Stem.TA.2 |
| 183003 | KRT19 | -8.111961 | -0.298748 | 4.980901e-16 | 7.197234e-15 | Stem.TA.2 |
| 183004 | FABP1 | -8.755919 | -0.765860 | 2.024473e-18 | 3.577872e-17 | Stem.TA.2 |
183005 rows × 6 columns
df_all_f = df_all[(df_all.pvals_adj < 0.05)&(abs(df_all.Logfoldchanges)> 0)]
df_all_f
| Genes | Scores | Logfoldchanges | pvals | pvals_adj | Cluster | |
|---|---|---|---|---|---|---|
| 0 | APCDD1 | 12.732535 | 7.994702 | 3.899681e-37 | 1.427322e-32 | Basal-like |
| 1 | PCP4 | 12.551043 | 7.022396 | 3.923096e-36 | 7.179462e-32 | Basal-like |
| 2 | OXR1 | 12.412327 | 3.096099 | 2.240448e-35 | 2.733422e-31 | Basal-like |
| 3 | SLC7A8 | 12.032034 | 3.951415 | 2.411465e-33 | 2.206551e-29 | Basal-like |
| 4 | LEF1 | 11.876751 | 6.676805 | 1.563254e-32 | 1.144333e-28 | Basal-like |
| ... | ... | ... | ... | ... | ... | ... |
| 183000 | LDHA | -5.866008 | 0.006572 | 4.464114e-09 | 3.598922e-08 | Stem.TA.2 |
| 183001 | MALAT1 | -7.237833 | -0.441098 | 4.559107e-13 | 5.164589e-12 | Stem.TA.2 |
| 183002 | RPL13A | -7.252900 | -0.107191 | 4.079416e-13 | 4.645634e-12 | Stem.TA.2 |
| 183003 | KRT19 | -8.111961 | -0.298748 | 4.980901e-16 | 7.197234e-15 | Stem.TA.2 |
| 183004 | FABP1 | -8.755919 | -0.765860 | 2.024473e-18 | 3.577872e-17 | Stem.TA.2 |
30652 rows × 6 columns
df_all_f.to_csv('/Users/usri/Desktop/YAO.7feb/feb10/scvelo__markers.Clusters.term.csv')
scv.pl.velocity_embedding_stream(adata, basis='umap', color = 'Clusters.term', legend_loc='on data')
scv.tl.velocity_confidence(adata)
keys = 'velocity_length', 'velocity_confidence'
scv.pl.scatter(adata, c = keys, cmap='coolwarm', perc = [5,95])
--> added 'velocity_length' (adata.obs) --> added 'velocity_confidence' (adata.obs)
adata.obs
| Barcode | old.barcode | orig.ident.x | nCount_spliced | nFeature_spliced | nCount_unspliced | nFeature_unspliced | nCount_ambiguous | nFeature_ambiguous | nCount_SCT | ... | initial_size | n_counts | velocity_self_transition | velocity_length | velocity_confidence | velocity_confidence_transition | root_cells | end_points | velocity_pseudotime | latent_time | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| possorted_genome_bam_K9RX7:AAACCCAGTCCGAAGAx | AAACCCAGTCCGAAGA-1 | possorted_genome_bam_K9RX7:AAACCCAGTCCGAAGAx | possorted | 22913.0 | 4969.0 | 5090.0 | 2782.0 | 3272.0 | 1358.0 | 16128.0 | ... | 22913.0 | NaN | 0.102989 | 9.460000 | 0.921991 | 0.133375 | 0.017141 | 0.169256 | 0.130356 | 0.537701 |
| possorted_genome_bam_K9RX7:AAACCCATCACTGTCCx | AAACCCATCACTGTCC-1 | possorted_genome_bam_K9RX7:AAACCCATCACTGTCCx | possorted | 27440.0 | 5237.0 | 5162.0 | 2843.0 | 3534.0 | 1437.0 | 16040.0 | ... | 27440.0 | NaN | 0.232820 | 10.370000 | 0.928700 | 0.118324 | 0.063491 | 0.194884 | 0.117105 | 0.471891 |
| possorted_genome_bam_K9RX7:AAACGCTAGGCTGGATx | AAACGCTAGGCTGGAT-1 | possorted_genome_bam_K9RX7:AAACGCTAGGCTGGATx | possorted | 15336.0 | 4196.0 | 4478.0 | 2170.0 | 2441.0 | 964.0 | 15307.0 | ... | 15336.0 | NaN | 0.232637 | 24.530001 | 0.879951 | 0.023200 | 0.582353 | 0.061875 | 0.201713 | 0.498656 |
| possorted_genome_bam_K9RX7:AAACGCTCATGAGTAAx | AAACGCTCATGAGTAA-1 | possorted_genome_bam_K9RX7:AAACGCTCATGAGTAAx | possorted | 23971.0 | 4398.0 | 3746.0 | 1987.0 | 2856.0 | 1079.0 | 15695.0 | ... | 23971.0 | NaN | 0.162755 | 8.260000 | 0.928741 | 0.092724 | 0.213005 | 0.039025 | 0.230673 | 0.213358 |
| possorted_genome_bam_K9RX7:AAAGAACCAACTGAAAx | AAAGAACCAACTGAAA-1 | possorted_genome_bam_K9RX7:AAAGAACCAACTGAAAx | possorted | 7347.0 | 2784.0 | 2622.0 | 1656.0 | 1071.0 | 633.0 | 14497.0 | ... | 7347.0 | NaN | 0.322005 | 13.040000 | 0.907942 | -0.043836 | 0.141615 | 0.071429 | 0.083507 | 0.360468 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| possorted_genome_bam_K9RX7:TTTGATCTCAATCGGTx | TTTGATCTCAATCGGT-1 | possorted_genome_bam_K9RX7:TTTGATCTCAATCGGTx | possorted | 28138.0 | 5320.0 | 6734.0 | 3200.0 | 3665.0 | 1412.0 | 15979.0 | ... | 28138.0 | NaN | 0.175585 | 8.380000 | 0.958243 | 0.046899 | 0.023345 | 0.827413 | 0.135706 | 0.515914 |
| possorted_genome_bam_K9RX7:TTTGGAGCAGGCACTCx | TTTGGAGCAGGCACTC-1 | possorted_genome_bam_K9RX7:TTTGGAGCAGGCACTCx | possorted | 25570.0 | 5454.0 | 7959.0 | 3454.0 | 3740.0 | 1506.0 | 16175.0 | ... | 25570.0 | NaN | 0.310259 | 23.280001 | 0.875293 | -0.116175 | 0.110187 | 0.021070 | 0.107341 | 0.499428 |
| possorted_genome_bam_K9RX7:TTTGGAGGTAACATGAx | TTTGGAGGTAACATGA-1 | possorted_genome_bam_K9RX7:TTTGGAGGTAACATGAx | possorted | 23972.0 | 5173.0 | 3411.0 | 2096.0 | 3153.0 | 1283.0 | 16057.0 | ... | 23972.0 | NaN | 0.260342 | 12.710000 | 0.971284 | -0.128660 | 0.027016 | 0.354314 | 0.139483 | 0.536332 |
| possorted_genome_bam_K9RX7:TTTGGAGTCGTGGGTCx | TTTGGAGTCGTGGGTC-1 | possorted_genome_bam_K9RX7:TTTGGAGTCGTGGGTCx | possorted | 4873.0 | 1973.0 | 1029.0 | 784.0 | 613.0 | 394.0 | 14611.0 | ... | 4873.0 | NaN | 0.087861 | 10.470000 | 0.928094 | 0.121362 | 0.146675 | 0.144499 | 0.330151 | 0.234698 |
| possorted_genome_bam_K9RX7:TTTGGAGTCTGCGGACx | TTTGGAGTCTGCGGAC-1 | possorted_genome_bam_K9RX7:TTTGGAGTCTGCGGACx | possorted | 53582.0 | 6531.0 | 9854.0 | 3972.0 | 6494.0 | 1875.0 | 15283.0 | ... | 53582.0 | NaN | 0.061931 | 9.140000 | 0.940884 | 0.223247 | 0.036231 | 0.070889 | 0.095348 | 0.276820 |
1770 rows × 45 columns
#df = adata.obs.groupby('Clusters.term')[keys].mean().T
#df.style.background_gradient(cmap='coolwarm', axis=1)
scv.pl.paga(adata, basis='umap', size=50, alpha=.1, min_edge_width=2, node_size_scale=1.5, color = 'Clusters.term')
WARNING: Invalid color key. Using grey instead.
/Users/anaconda3/lib/python3.9/site-packages/networkx/convert.py:157: DeprecationWarning: The scipy.sparse array containers will be used instead of matrices in Networkx 3.0. Use `from_scipy_sparse_array` instead. return nx.from_scipy_sparse_matrix(data, create_using=create_using) /Users/anaconda3/lib/python3.9/site-packages/networkx/convert.py:157: DeprecationWarning: The scipy.sparse array containers will be used instead of matrices in Networkx 3.0. Use `from_scipy_sparse_array` instead. return nx.from_scipy_sparse_matrix(data, create_using=create_using)
op_genes = adata.var['velocity_qreg_ratio'].sort_values(ascending=False).index[:600]
scv.pl.heatmap(adata, var_names=top_genes, sortby='latent_time', col_color='Clusters.term', n_convolve=100)
/Users/anaconda3/lib/python3.9/site-packages/seaborn/matrix.py:213: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. self.cmap = mpl.cm.get_cmap(cmap)
var_names = ['KRT17', 'KRT19', 'KRT20']
scv.pl.scatter(adata, var_names, color = ['Clusters.term'], frameon=False)
#scv.pl.scatter(adata, x='latent_time', color = ['Clusters.term'],y=var_names, frameon=False)
#Cluster-specific top-likelihood genes
#Moreover, partial gene likelihoods can be computed for a each cluster of cells to enable cluster-specific identification of potential drivers.
scv.tl.rank_dynamical_genes(adata, groupby='Clusters.term')
df = scv.get_df(adata, 'rank_dynamical_genes/names')
df.head(50)
ranking genes by cluster-specific likelihoods
finished (0:00:00) --> added
'rank_dynamical_genes', sorted scores by group ids (adata.uns)
| Basal-like | Entero-like | Sec-like | Stem.TA.1 | Stem.TA.2 | |
|---|---|---|---|---|---|
| 0 | RBP1 | PAPSS2 | LIMA1 | GPA33 | KCTD16 |
| 1 | KCNJ8 | DACH1 | ATAD2 | KIT | KIF4A |
| 2 | FAM171B | LPIN2 | MYH9 | LGALS4 | GPA33 |
| 3 | ANKRD34B | NMU | PRKDC | KCTD16 | HNMT |
| 4 | ATAD2 | KIT | GPA33 | IQGAP2 | PGK1 |
| 5 | MT4 | RND3 | PLPP1 | LPIN2 | LGALS4 |
| 6 | TMPRSS11E | PRKDC | LDHA | HMGCS2 | ATAD2 |
| 7 | HMCN1 | TDP2 | G2E3 | ATAD2 | LPIN2 |
| 8 | TCEA2 | PLPP1 | TPD52L1 | ZNF704 | FOS |
| 9 | TRPC3 | P4HA1 | CALB1 | TPX2 | ZNF704 |
| 10 | CPVL | C15orf48 | TSPAN8 | PGK1 | TPX2 |
| 11 | ISM1 | FER1L6 | INSIG1 | DBF4 | RND3 |
| 12 | CDK19 | KCTD16 | CENPF | PAPSS2 | CENPF |
| 13 | HAS2 | HMGCS2 | SLC4A7 | CDC25A | CLSPN |
| 14 | AL355916.1 | CA9 | TUBA1C | TCOF1 | PARM1 |
| 15 | IDI1 | TSPAN5 | PIF1 | P4HA1 | TMEM45B |
| 16 | PCP4 | AIG1 | HMGCS1 | FOS | IQGAP2 |
| 17 | SPTSSB | RHBDL2 | TPX2 | CDCA4 | BLM |
| 18 | LINC01116 | NXPE4 | NCAPG2 | TSPAN8 | CDCA4 |
| 19 | CCDC80 | CD276 | BMP4 | AKAP7 | CALB1 |
| 20 | OSBPL6 | BCAS1 | SKA3 | CLSPN | NMU |
| 21 | KIT | MAP4K4 | RRM1 | NCAPH | FANCI |
| 22 | APCDD1 | PFKFB4 | SLC12A2 | TMEM45B | DBF4 |
| 23 | SERPINI1 | SRPX2 | XRCC2 | CALB1 | FAM83D |
| 24 | ZNF618 | PARM1 | TSC22D1 | RND3 | TSPAN8 |
| 25 | GNAI1 | TSC22D1 | KRT18 | TSC22D1 | LIMA1 |
| 26 | TRDC | FAM222A | EFNB2 | KIF4A | TCOF1 |
| 27 | CLCA2 | LGALS4 | ARL6IP1 | CENPF | TSC22D1 |
| 28 | LEF1 | FOS | TROAP | HNMT | EXO1 |
| 29 | FGF20 | HNMT | ATP2B1 | KIF11 | KIF11 |
| 30 | ZNF704 | YPEL2 | MELK | CDK19 | IDI1 |
| 31 | HEPHL1 | SLC12A2 | DBF4 | NMU | FANCB |
| 32 | MMP2 | SORL1 | TRIM31 | NCAPG2 | MELK |
| 33 | G2E3 | CDK19 | CEP152 | PLPP1 | NCAPH |
| 34 | SLC1A3 | IDI1 | SULT1B1 | PARM1 | HMGCS2 |
| 35 | TSPAN5 | HHLA2 | KIFC1 | FAM83D | DSCC1 |
| 36 | SRPX | ART3 | HJURP | KIF2C | PLPP1 |
| 37 | CALB1 | BNIP3L | KIF20B | FANCB | ALCAM |
| 38 | YPEL5 | YPEL5 | ARHGEF39 | IDI1 | TUBA1C |
| 39 | PRRX1 | CEMIP | MSMO1 | TDP2 | G2E3 |
| 40 | CHST11 | NR1H4 | MORC4 | TUBA1C | E2F8 |
| 41 | FAM178B | TNFSF15 | ATF3 | WDHD1 | EFNB2 |
| 42 | LRRN3 | LGI4 | C10orf99 | SGO2 | RRM1 |
| 43 | GATA3 | TRIB1 | PDCD4 | MAP4K4 | AIG1 |
| 44 | RAB7B | HAVCR1 | ESCO2 | CASP8AP2 | CDCA2 |
| 45 | MAN1A1 | TPD52L1 | NDC80 | COLCA2 | PAPSS2 |
| 46 | FHOD3 | LIPH | KIF14 | LIMA1 | MAOA |
| 47 | AC005162.3 | BHLHE40 | MSH6 | SYTL2 | MCM10 |
| 48 | P4HA1 | MYH9 | KRT19 | PRKDC | CCNE2 |
| 49 | MAP2 | ARHGAP29 | XAF1 | ALCAM | CD276 |
kwargs = dict(frameon=False, size=10, linewidth=1.5,
add_outline='Basal-like, Entero-like, Sec-like, Stem.TA.1, Stem.TA.2')
scv.pl.scatter(adata, df['Basal-like'][:8], ylabel='Basal-like', **kwargs)
scv.pl.scatter(adata, df['Entero-like'][:8], ylabel='Entero-like', **kwargs)
scv.pl.scatter(adata, df['Sec-like'][:8], ylabel='Sec-like', **kwargs)
scv.pl.scatter(adata, df['Stem.TA.1'][:8], ylabel='Stem.TA.1', **kwargs)
scv.pl.scatter(adata, df['Stem.TA.2'][:8], ylabel='Stem.TA.2', **kwargs)
#Velocities in cycling progenitors¶
#The cell cycle detected by RNA velocity, is biologically affirmed by cell cycle scores (standardized scores of mean expression levels of phase marker genes).
scv.tl.score_genes_cell_cycle(adata)
scv.pl.scatter(adata, color_gradients=['S_score', 'G2M_score'], smooth=True, perc=[5, 95])
calculating cell cycle phase --> 'S_score' and 'G2M_score', scores of cell cycle phases (adata.obs)
s_genes, g2m_genes = scv.utils.get_phase_marker_genes(adata)
s_genes = scv.get_df(adata[:, s_genes], 'spearmans_score', sort_values=True).index
g2m_genes = scv.get_df(adata[:, g2m_genes], 'spearmans_score', sort_values=True).index
kwargs = dict(frameon=False, ylabel='cell cycle genes')
scv.pl.scatter(adata, list(s_genes[:2]) + list(g2m_genes[:3]), **kwargs)
#Top-likelihood genes:: Identified driver genes with pronounced dynamic behavior
#Driver genes display pronounced dynamic behavior and are systematically detected via their characterization by high likelihoods in the dynamic model
top_genes = adata.var['fit_likelihood'].sort_values(ascending=False).index
scv.pl.scatter(adata, basis=top_genes[:20], ncols=5, frameon=False)
top_genes = adata.var['fit_likelihood'].sort_values(ascending=False).index[:300]
scv.pl.heatmap(adata, var_names=top_genes, sortby='latent_time', col_color='clusters', n_convolve=100)
/Users/anaconda3/lib/python3.9/site-packages/seaborn/matrix.py:213: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. self.cmap = mpl.cm.get_cmap(cmap)
var_names = ['KRT17', 'KRT20', 'KRT19']
scv.pl.scatter(adata, var_names, frameon=False)
scv.pl.scatter(adata, x='latent_time', y=var_names, frameon=False)
#Testing top-likelihood genes¶
#Screening through the top-likelihood genes, we find some gene-wise dynamics that display multiple kinetic regimes.
scv.tl.recover_dynamics(adata)
top_genes = adata.var['fit_likelihood'].sort_values(ascending=False).index[:100]
scv.tl.differential_kinetic_test(adata, var_names=top_genes, groupby='Clusters.term')
scv.tl.velocity_pseudotime(adata)
scv.pl.scatter(adata, color='velocity_pseudotime', cmap='gnuplot')
scv.tl.latent_time(adata)
scv.pl.scatter(adata, color='latent_time', color_map='gnuplot', size=80)
top_genes = adata.var['fit_likelihood'].sort_values(ascending=False).index[:300]
scv.pl.heatmap(adata, var_names=top_genes, sortby='latent_time', col_color='clusters', n_convolve=100)
scv.pl.velocity_embedding_stream(adata, basis='umap', color='Clusters')
scv.pl.paga(adata, basis='umap', size=50, alpha=.1,
min_edge_width=2, node_size_scale=1.5)
WARNING: Invalid color key. Using grey instead.
/Users/anaconda3/lib/python3.9/site-packages/networkx/convert.py:157: DeprecationWarning: The scipy.sparse array containers will be used instead of matrices in Networkx 3.0. Use `from_scipy_sparse_array` instead. return nx.from_scipy_sparse_matrix(data, create_using=create_using)
scv.tl.velocity(adata, mode='steady_state')
scv.tl.velocity(adata, mode='stochastic')
scv.tl.velocity(adata, mode='dynamical')
computing velocities
finished (0:00:00) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
computing velocities
finished (0:00:00) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
computing velocities
finished (0:00:01) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
scv.pl.scatter(adata, basis=[top_genes, ])
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Input In [69], in <cell line: 1>() ----> 1 scv.pl.scatter(adata, basis=[top_genes, ]) File /Users/anaconda3/lib/python3.9/site-packages/scvelo/plotting/scatter.py:204, in scatter(adata, basis, x, y, vkey, color, use_raw, layer, color_map, colorbar, palette, size, alpha, linewidth, linecolor, perc, groups, sort_order, components, projection, legend_loc, legend_loc_lines, legend_fontsize, legend_fontweight, legend_fontoutline, legend_align_text, xlabel, ylabel, title, fontsize, figsize, xlim, ylim, add_density, add_assignments, add_linfit, add_polyfit, add_rug, add_text, add_text_pos, add_margin, add_outline, outline_width, outline_color, n_convolve, smooth, normalize_data, rescale_color, color_gradients, dpi, frameon, zorder, ncols, nrows, wspace, hspace, show, save, ax, **kwargs) 202 multikey = unique(multikey) # take unique set if no more than one multikey 203 if len(multikey) > 20: --> 204 raise ValueError("Please restrict the passed list to max 20 elements.") 205 if ax is not None: 206 logg.warn("Cannot specify `ax` when plotting multiple panels.") ValueError: Please restrict the passed list to max 20 elements.
scv.pl.scatter(adata, basis="KRT17")
scv.tl.velocity(adata, mode='stochastic')
@scv.tl.velocity_graph(adata)
#scv.tl.umap(adata)
#scv.pl.velocity_embedding_stream(adata, basis='umap', color=['clusters'])
Input In [71] #scv.pl.velocity_embedding_stream(adata, basis='umap', color=['clusters']) ^ SyntaxError: unexpected EOF while parsing
#top likelihood genes
print(adata.var['velocity_genes'].sum(), adata.n_vars)
top_genes = adata.var_names[adata.var.fit_likelihood.argsort()[::-1]]
scv.pl.scatter(adata, basis=top_genes[:10], ncols=5)
438 1376